{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.ensemble import RandomForestClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [],
   "source": [
    "from sklearn.utils import column_or_1d\n",
    "data_path='./diabetes.csv'\n",
    "dataset=pd.read_csv(data_path)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [],
   "source": [
    "pd.set_option('display.max_columns', None)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \\\n",
      "0            6      148             72             35        0  33.6   \n",
      "1            1       85             66             29        0  26.6   \n",
      "2            8      183             64              0        0  23.3   \n",
      "3            1       89             66             23       94  28.1   \n",
      "4            0      137             40             35      168  43.1   \n",
      "5            5      116             74              0        0  25.6   \n",
      "6            3       78             50             32       88  31.0   \n",
      "7           10      115              0              0        0  35.3   \n",
      "8            2      197             70             45      543  30.5   \n",
      "9            8      125             96              0        0   0.0   \n",
      "\n",
      "   DiabetesPedigreeFunction  Age  Outcome  \n",
      "0                     0.627   50        1  \n",
      "1                     0.351   31        0  \n",
      "2                     0.672   32        1  \n",
      "3                     0.167   21        0  \n",
      "4                     2.288   33        1  \n",
      "5                     0.201   30        0  \n",
      "6                     0.248   26        1  \n",
      "7                     0.134   29        0  \n",
      "8                     0.158   53        1  \n",
      "9                     0.232   54        1  \n"
     ]
    }
   ],
   "source": [
    "print(dataset.head(10))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [],
   "source": [
    "y = dataset.get('Outcome')\n",
    "X = dataset.drop('Outcome', axis=1)\n",
    "rf = RandomForestClassifier()\n",
    "\n",
    "\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [],
   "source": [
    "from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, classification_report, \\\n",
    "    accuracy_score\n",
    "\n",
    "# from yellowbrick import ROCAUC\n",
    "# from yellowbrick.classifier import ClassPredictionError, ClassificationReport, ConfusionMatrix, DiscriminationThreshold\n",
    "# from yellowbrick.model_selection import LearningCurve, CVScores, FeatureImportances, RFECV, ValidationCurve"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [],
   "source": [
    "train_data=dataset.iloc[:-70,:]\n",
    "test_data=dataset.iloc[-70:,:]\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \\\n",
      "698            4      127             88             11      155  34.5   \n",
      "699            4      118             70              0        0  44.5   \n",
      "700            2      122             76             27      200  35.9   \n",
      "701            6      125             78             31        0  27.6   \n",
      "702            1      168             88             29        0  35.0   \n",
      "..           ...      ...            ...            ...      ...   ...   \n",
      "763           10      101             76             48      180  32.9   \n",
      "764            2      122             70             27        0  36.8   \n",
      "765            5      121             72             23      112  26.2   \n",
      "766            1      126             60              0        0  30.1   \n",
      "767            1       93             70             31        0  30.4   \n",
      "\n",
      "     DiabetesPedigreeFunction  Age  Outcome  \n",
      "698                     0.598   28        0  \n",
      "699                     0.904   26        0  \n",
      "700                     0.483   26        0  \n",
      "701                     0.565   49        1  \n",
      "702                     0.905   52        1  \n",
      "..                        ...  ...      ...  \n",
      "763                     0.171   63        0  \n",
      "764                     0.340   27        0  \n",
      "765                     0.245   30        0  \n",
      "766                     0.349   47        1  \n",
      "767                     0.315   23        0  \n",
      "\n",
      "[70 rows x 9 columns]\n"
     ]
    }
   ],
   "source": [
    "print(test_data)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [],
   "source": [
    "x_train=train_data.iloc[:,:8]\n",
    "y_train=train_data.iloc[:,8:9]\n",
    "x_test=test_data.iloc[:,:8]\n",
    "y_test=test_data.iloc[:,8:9]"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\masaikk\\anaconda3\\envs\\week4\\lib\\site-packages\\sklearn\\utils\\validation.py:63: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  return f(*args, **kwargs)\n"
     ]
    }
   ],
   "source": [
    "model = rf.fit(x_train, column_or_1d(y_train, warn=True))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [],
   "source": [
    "y_pred = model.predict(x_test)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "outputs": [
    {
     "data": {
      "text/plain": "array([0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,\n       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n       0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n       0, 0, 1, 0], dtype=int64)"
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "准确率:0.7857142857142857\n",
      "精确率:0.7727272727272727\n"
     ]
    },
    {
     "data": {
      "text/plain": "array([0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,\n       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n       0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n       0, 0, 1, 0], dtype=int64)"
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "accuracy_score_value = accuracy_score(y_test, y_pred)\n",
    "print(f\"准确率:{accuracy_score_value}\")\n",
    "\n",
    "precision_score_value = precision_score(y_test, y_pred)\n",
    "print(f\"精确率:{precision_score_value}\")\n",
    "# 做出预测\n",
    "\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \\\n",
      "698            4      127             88             11      155  34.5   \n",
      "699            4      118             70              0        0  44.5   \n",
      "700            2      122             76             27      200  35.9   \n",
      "701            6      125             78             31        0  27.6   \n",
      "702            1      168             88             29        0  35.0   \n",
      "..           ...      ...            ...            ...      ...   ...   \n",
      "763           10      101             76             48      180  32.9   \n",
      "764            2      122             70             27        0  36.8   \n",
      "765            5      121             72             23      112  26.2   \n",
      "766            1      126             60              0        0  30.1   \n",
      "767            1       93             70             31        0  30.4   \n",
      "\n",
      "     DiabetesPedigreeFunction  Age  Outcome  pred  \n",
      "698                     0.598   28        0     0  \n",
      "699                     0.904   26        0     1  \n",
      "700                     0.483   26        0     0  \n",
      "701                     0.565   49        1     0  \n",
      "702                     0.905   52        1     1  \n",
      "..                        ...  ...      ...   ...  \n",
      "763                     0.171   63        0     0  \n",
      "764                     0.340   27        0     0  \n",
      "765                     0.245   30        0     0  \n",
      "766                     0.349   47        1     1  \n",
      "767                     0.315   23        0     0  \n",
      "\n",
      "[70 rows x 10 columns]\n"
     ]
    }
   ],
   "source": [
    "y_pred_dataframe=pd.DataFrame(y_pred,columns=['pred'],index=[i+698 for i in range(70)])\n",
    "# print(y_pred_dataframe)\n",
    "# print(test_data)\n",
    "# y_pred_dataframe\n",
    "# print(y_pred_dataframe)\n",
    "# print(test_data)\n",
    "y_output=pd.concat([test_data,y_pred_dataframe], axis=1)\n",
    "print(y_output)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "outputs": [],
   "source": [
    "y_output.to_csv('./pima_pred.csv',encoding='utf-8',index=False)\n",
    "# 将数据写入文件"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "outputs": [],
   "source": [
    "import logging\n",
    "\n",
    "from boto3.session import Session\n",
    "from botocore.exceptions import ClientError\n",
    "class S3Handler():\n",
    "    def __init__(self):\n",
    "        access_key = \"29E2BC5B6851CD32568A\"\n",
    "        secret_key = \"WzVDQTFCMjlBMjU2NzQ4MTlFMTU3MjdDMzMyQTg1\"\n",
    "        url = \"http://scut.depts.bingosoft.net:29997\"  # 也可以是自己节点的地址\n",
    "        session = Session(access_key, secret_key)\n",
    "        self.s3_client = session.client('s3', endpoint_url=url)\n",
    "\n",
    "    def download_file(self, bucketName, objectName, fileName):\n",
    "        \"\"\"\n",
    "        下载文件\n",
    "        :param bucketName: 桶的名称\n",
    "        :param objectName: 文件的路径\n",
    "        :param fileName: 下载完成的文件的名称----注意：下载之后的文件默认储存在自己的python工程路径下\n",
    "        :return:\n",
    "        \"\"\"\n",
    "        self.s3_client.download_file(bucketName, objectName, fileName)\n",
    "\n",
    "    def upload_file(self, file_name, bucket, object_name=None):\n",
    "        \"\"\"\n",
    "        上传文件\n",
    "        :param file_name: 需要上传的文件的名称\n",
    "        :param bucket: S3中桶的名称\n",
    "        :param object_name: 需要上传到的路径，例如file/localfile/test\n",
    "        :return:\n",
    "        \"\"\"\n",
    "        if object_name is None:\n",
    "            object_name = file_name\n",
    "        try:\n",
    "            self.s3_client.upload_file(file_name, bucket, object_name, ExtraArgs={'ACL': 'public-read'})\n",
    "        except ClientError as e:\n",
    "            logging.error(e)\n",
    "            return False\n",
    "        return True\n",
    "\n",
    "    def list_object(self, bucketName):\n",
    "        \"\"\"\n",
    "        列出当前桶下所有的文件\n",
    "        :param bucketName:\n",
    "        :return:\n",
    "        \"\"\"\n",
    "        file_list = []\n",
    "        response = self.s3_client.list_objects_v2(\n",
    "            Bucket=bucketName,\n",
    "            # MaxKeys=1000  # 返回数量，如果为空则为全部\n",
    "        )\n",
    "        file_desc = response['Contents']\n",
    "        for f in file_desc:\n",
    "            print('file_name:{},file_size:{}'.format(f['Key'], f['Size']))\n",
    "            file_list.append(f['Key'])\n",
    "        return file_list\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### 导入了boto的包准备上传文件"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "outputs": [
    {
     "data": {
      "text/plain": "True"
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s3obj=S3Handler()\n",
    "s3obj.upload_file('pima_pred.csv','hujianqiao03')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}